home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 42
/
Amiga Format AFCD42 (Issue 126, Aug 1999).iso
/
-serious-
/
programming
/
other
/
jikes
/
src
/
diagnose.cpp
< prev
next >
Wrap
C/C++ Source or Header
|
1999-05-14
|
97KB
|
2,500 lines
// $Id: diagnose.cpp,v 1.5 1999/02/24 19:40:14 shields Exp $
//
// This software is subject to the terms of the IBM Jikes Compiler
// License Agreement available at the following URL:
// http://www.ibm.com/research/jikes.
// Copyright (C) 1996, 1998, International Business Machines Corporation
// and others. All Rights Reserved.
// You must accept the terms of that agreement to use this software.
//
#include "config.h"
#include <assert.h>
#include <iostream.h>
#include "diagnose.h"
#include "control.h"
#include "semantic.h"
#include "unicode.h"
#include "case.h"
#include "spell.h"
void DiagnoseParser::ReallocateStacks()
{
int old_stack_length = stack_length;
stack_length += STACK_INCREMENT;
assert(stack_length <= SHRT_MAX);
int *old_stack = stack;
stack = (int *) memmove(new int[stack_length], stack, old_stack_length * sizeof(int));
delete [] old_stack;
Location *old_location_stack = location_stack;
location_stack = (Location *) memmove(new Location[stack_length], location_stack, old_stack_length * sizeof(Location));
delete [] old_location_stack;
Ast **old_parse_stack = parse_stack;
parse_stack = (Ast **) memmove(new Ast *[stack_length], parse_stack, old_stack_length * sizeof(Ast *));
delete [] old_parse_stack;
int *old_temp_stack = temp_stack;
temp_stack = (int *) memmove(new int[stack_length], temp_stack, old_stack_length * sizeof(int));
delete [] old_temp_stack;
int *old_next_stack = next_stack;
next_stack = (int *) memmove(new int[stack_length], next_stack, old_stack_length * sizeof(int));
delete [] old_next_stack;
int *old_prev_stack = prev_stack;
prev_stack = (int *) memmove(new int[stack_length], prev_stack, old_stack_length * sizeof(int));
delete [] old_prev_stack;
int *old_scope_index = scope_index;
scope_index = (int *) memmove(new int[stack_length], scope_index, old_stack_length * sizeof(int));
delete [] old_scope_index;
int *old_scope_position = scope_position;
scope_position = (int *) memmove(new int[stack_length], scope_position, old_stack_length * sizeof(int));
delete [] old_scope_position;
return;
}
void DiagnoseParser::DiagnoseParse()
{
lex_stream -> Reset();
TokenObject curtok = lex_stream -> Gettoken();
int prev_pos,
pos,
next_pos,
i,
tok,
lhs_symbol,
act = START_STATE,
current_kind = lex_stream -> Kind(curtok);
ReallocateStacks();
/*****************************************************************/
/* Start parsing. */
/*****************************************************************/
state_stack_top = 0;
stack[state_stack_top] = act;
tok = lex_stream -> Kind(curtok);
location_stack[state_stack_top] = Loc(curtok);
//
// Process a terminal
//
do
{
/*************************************************************/
/* Synchronize state stacks and update the location stack. */
/*************************************************************/
prev_pos = -1;
prev_stack_top = -1;
next_pos = -1;
next_stack_top = -1;
pos = state_stack_top;
temp_stack_top = state_stack_top - 1;
for (i = 0; i <= state_stack_top; i++)
temp_stack[i] = stack[i];
act = t_action(act, tok, lex_stream);
/*****************************************************************/
/* When a reduce action is encountered, we compute all REDUCE */
/* and associated goto actions induced by the current token. */
/* Eventually, a SHIFT, SHIFT-REDUCE, ACCEPT or ERROR action is */
/* computed... */
/*****************************************************************/
while (act <= NUM_RULES)
{
do
{
temp_stack_top -= (rhs[act]-1);
//
// if no error detected?
// semantic_action(act);
//
act = nt_action(temp_stack[temp_stack_top], lhs[act]);
} while(act <= NUM_RULES);
/*************************************************/
/* ... Update the maximum useful position of the */
/* (STATE_)STACK, push goto state into stack, and*/
/* compute next action on current symbol ... */
/*************************************************/
if (temp_stack_top + 1 >= stack_length)
ReallocateStacks();
pos = Min(pos, temp_stack_top);
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
/*****************************************************************/
/* At this point, we have a shift, shift-reduce, accept or error */
/* action. STACK contains the configuration of the state stack */
/* prior to executing any action on curtok. next_stack contains */
/* the configuration of the state stack after executing all */
/* reduce actions induced by curtok. The variable pos indicates */
/* the highest position in STACK that is still useful after the */
/* reductions are executed. */
/*****************************************************************/
while(act > ERROR_ACTION || /* SHIFT-REDUCE action */
act < ACCEPT_ACTION) /* SHIFT action */
{
next_stack_top = temp_stack_top + 1;
for (i = next_pos + 1; i <= next_stack_top; i++)
next_stack[i] = temp_stack[i];
for (i = pos + 1; i <= next_stack_top; i++)
location_stack[i] = location_stack[state_stack_top];
/*****************************************************/
/* If we have a shift-reduce, process it as well as */
/* the goto-reduce actions that follow it. */
/*****************************************************/
if (act > ERROR_ACTION)
{
act -= ERROR_ACTION;
do
{
next_stack_top -= (rhs[act]-1);
//
// if no error detected?
// semantic_action(act);
//
act = nt_action(next_stack[next_stack_top], lhs[act]);
} while(act <= NUM_RULES);
pos = Min(pos, next_stack_top);
}
if (next_stack_top + 1 >= stack_length)
ReallocateStacks();
temp_stack_top = next_stack_top;
next_stack[++next_stack_top] = act;
next_pos = next_stack_top;
/********************************************************/
/* Simulate the parser through the next token without */
/* destroying STACK or next_stack. */
/********************************************************/
curtok = lex_stream -> Gettoken();
tok = lex_stream -> Kind(curtok);
act = t_action(act, tok, lex_stream);
while(act <= NUM_RULES)
{
/*************************************************/
/* ... Process all goto-reduce actions following */
/* reduction, until a goto action is computed ...*/
/*************************************************/
do
{
lhs_symbol = lhs[act];
temp_stack_top -= (rhs[act]-1);
//
// if no error detected?
// semantic_action(act);
//
act = (temp_stack_top > next_pos
? temp_stack[temp_stack_top]
: next_stack[temp_stack_top]);
act = nt_action(act, lhs_symbol);
} while(act <= NUM_RULES);
/*************************************************/
/* ... Update the maximum useful position of the */
/* (STATE_)STACK, push GOTO state into stack, and*/
/* compute next action on current symbol ... */
/*************************************************/
if (temp_stack_top + 1 >= stack_length)
ReallocateStacks();
next_pos = Min(next_pos, temp_stack_top);
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
/*****************************************************/
/* No error was detected, Read next token into */
/* PREVTOK element, advance CURTOK pointer and */
/* update stacks. */
/*****************************************************/
if (act != ERROR_ACTION)
{
prev_stack_top = state_stack_top;
for (i = prev_pos + 1; i <= prev_stack_top; i++)
prev_stack[i] = stack[i];
prev_pos = pos;
state_stack_top = next_stack_top;
for (i = pos + 1; i <= state_stack_top; i++)
stack[i] = next_stack[i];
location_stack[state_stack_top] = Loc(curtok);
pos = next_pos;
}
}
/*********************************************************/
/* At this stage, either we have an ACCEPT or an ERROR */
/* action. */
/*********************************************************/
if (act == ERROR_ACTION)
{
//
// An error was detected.
//
RepairCandidate candidate = ErrorRecovery(curtok);
act = stack[state_stack_top];
/*****************************************************************/
/* If the recovery was successful on a nonterminal candidate, */
/* parse through that candidate and "read" the next token. */
/*****************************************************************/
if (!candidate.symbol)
break;
else if (candidate.symbol > NT_OFFSET)
{
lhs_symbol = candidate.symbol - NT_OFFSET;
act = nt_action(act, lhs_symbol);
while(act <= NUM_RULES)
{
state_stack_top -= (rhs[act]-1);
act = nt_action(stack[state_stack_top], lhs[act]);
}
stack[++state_stack_top] = act;
curtok = lex_stream -> Gettoken();
tok = lex_stream -> Kind(curtok);
location_stack[state_stack_top] = Loc(curtok);
}
else
{
tok = candidate.symbol;
location_stack[state_stack_top] = candidate.location;
}
}
} while (act != ACCEPT_ACTION);
error.PrintMessages();
return;
}
/*****************************************************************/
/* This routine is invoked when an error is encountered. It */
/* tries to diagnose the error and recover from it. If it is */
/* successful, the state stack, the current token and the buffer */
/* are readjusted; i.e., after a successful recovery, */
/* state_stack_top points to the location in the state stack */
/* that contains the state on which to recover; curtok */
/* identifies the symbol on which to recover. */
/* */
/* Up to three configurations may be available when this routine */
/* is invoked. PREV_STACK may contain the sequence of states */
/* preceding any action on prevtok, STACK always contains the */
/* sequence of states preceding any action on curtok, and */
/* NEXT_STACK may contain the sequence of states preceding any */
/* action on the successor of curtok. */
/*****************************************************************/
RepairCandidate DiagnoseParser::ErrorRecovery(TokenObject error_token)
{
TokenObject prevtok = lex_stream -> Previous(error_token);
RepairCandidate candidate;
/*****************************************************************/
/* Try primary phase recoveries. If not successful, try secondary*/
/* phase recoveries. If not successful and we are at end of the */
/* file, we issue the end-of-file error and quit. Otherwise, ... */
/*****************************************************************/
candidate = PrimaryPhase(error_token);
if (candidate.symbol)
return candidate;
candidate = SecondaryPhase(error_token);
if (candidate.symbol)
return candidate;
if (lex_stream -> Kind(error_token) == EOFT_SYMBOL)
{
error.Report(0, EOF_CODE, terminal_index[EOFT_SYMBOL],
Loc(prevtok), prevtok);
candidate.symbol = 0;
candidate.location = Loc(error_token);
return candidate;
}
/*****************************************************************/
/* At this point, primary and (initial attempt at) secondary */
/* recovery did not work. We will now get into "panic mode" and */
/* keep trying secondary phase recoveries until we either find */
/* a successful recovery or have consumed the remaining input */
/* tokens. */
/*****************************************************************/
while(lex_stream -> Kind(buffer[BUFF_UBOUND]) != EOFT_SYMBOL)
{
candidate = SecondaryPhase(buffer[MAX_DISTANCE - MIN_DISTANCE + 2]);
if (candidate.symbol)
return candidate;
}
/*****************************************************************/
/* We reached the end of the file while panicking. Delete all */
/* remaining tokens in the input. */
/*****************************************************************/
int i;
for (i = BUFF_UBOUND; lex_stream -> Kind(buffer[i]) == EOFT_SYMBOL; i--)
;
error.Report(0, DELETION_CODE, terminal_index[lex_stream -> Kind(prevtok)],
Loc(error_token), buffer[i]);
candidate.symbol = 0;
candidate.location = Loc(buffer[i]);
return candidate;
}
/*****************************************************************/
/* This function tries primary and scope recovery on each */
/* available configuration. If a successful recovery is found */
/* and no secondary phase recovery can do better, a diagnosis is */
/* issued, the configuration is updated and the function returns */
/* "true". Otherwise, it returns "false". */
/*****************************************************************/
RepairCandidate DiagnoseParser::PrimaryPhase(TokenObject error_token)
{
PrimaryRepairInfo repair,
new_repair;
RepairCandidate candidate;
repair.distance = 0;
repair.misspell_index = 0;
repair.code = 0;
candidate.symbol = 0;
/*****************************************************************/
/* Initialize the buffer. */
/*****************************************************************/
int i = (next_stack_top >= 0 ? 3 : 2);
buffer[i] = error_token;
int k;
for (k = i; k > 0; k--)
buffer[k - 1] = lex_stream -> Previous(buffer[k]);
for (k = i + 1; k < BUFF_SIZE; k++)
buffer[k] = lex_stream -> Next(buffer[k - 1]);
/*****************************************************************/
/* If NEXT_STACK_TOP > 0 then the parse was successful on CURTOK */
/* and the error was detected on the successor of CURTOK. In */
/* that case, first check whether or not primary recovery is */
/* possible on next_stack ... */
/*****************************************************************/
if (next_stack_top >= 0)
{
repair.buffer_position = 3;
repair = CheckPrimaryDistance(next_stack, next_stack_top, repair);
}
/*****************************************************************/
/* ... Next, try primary recovery on the current token... */
/*****************************************************************/
new_repair = repair;
new_repair.buffer_position = 2;
new_repair = CheckPrimaryDistance(stack, state_stack_top, new_repair);
if (new_repair.distance > repair.distance ||
new_repair.misspell_index > repair.misspell_index)
repair = new_repair;
/*****************************************************************/
/* Finally, if prev_stack_top >= 0 then try primary recovery on */
/* the prev_stack configuration. */
/*****************************************************************/
if (prev_stack_top >= 0)
{
new_repair = repair;
new_repair.buffer_position = 1;
new_repair = CheckPrimaryDistance(prev_stack,
prev_stack_top, new_repair);
if (new_repair.distance > repair.distance ||
new_repair.misspell_index > repair.misspell_index)
repair = new_repair;
}
/*****************************************************************/
/* Before accepting the best primary phase recovery obtained, */
/* ensure that we cannot do better with a similar secondary */
/* phase recovery. */
/*****************************************************************/
if (next_stack_top >= 0) /* next_stack available */
{
if (SecondaryCheck(next_stack,next_stack_top,3,repair.distance))
return candidate;
}
else if (SecondaryCheck(stack, state_stack_top, 2, repair.distance))
return candidate;
/*****************************************************************/
/* First, adjust distance if the recovery is on the error token; */
/* it is important that the adjustment be made here and not at */
/* each primary trial to prevent the distance tests from being */
/* biased in favor of deferred recoveries which have access to */
/* more input tokens... */
/*****************************************************************/
repair.distance = repair.distance - repair.buffer_position + 1;
/*****************************************************************/
/* ...Next, adjust the distance if the recovery is a deletion or */
/* (some form of) substitution... */
/*****************************************************************/
if (repair.code == INVALID_CODE ||
repair.code == DELETION_CODE ||
repair.code == SUBSTITUTION_CODE ||
repair.code == MERGE_CODE)
repair.distance--;
/*****************************************************************/
/* ... After adjustment, check if the most successful primary */
/* recovery can be applied. If not, continue with more radical */
/* recoveries... */
/*****************************************************************/
if (repair.distance < MIN_DISTANCE)
return candidate;
/*****************************************************************/
/* When processing an insertion error, if the token preceeding */
/* the error token is not available, we change the repair code */
/* into a BEFORE_CODE to instruct the reporting routine that it */
/* indicates that the repair symbol should be inserted before */
/* the error token. */
/*****************************************************************/
if (repair.code == INSERTION_CODE)
{
if (! lex_stream -> Kind(buffer[repair.buffer_position - 1]))
repair.code = BEFORE_CODE;
}
/*****************************************************************/
/* Select the proper sequence of states on which to recover, */
/* update stack accordingly and call diagnostic routine. */
/*****************************************************************/
if (repair.buffer_position == 1)
{
state_stack_top = prev_stack_top;
for (i = 0; i <= state_stack_top; i++)
stack[i] = prev_stack[i];
}
else if (next_stack_top >= 0 && repair.buffer_position >= 3)
{
state_stack_top = next_stack_top;
for (i = 0; i <= state_stack_top; i++)
stack[i] = next_stack[i];
location_stack[state_stack_top] = Loc(buffer[3]);
}
return PrimaryDiagnosis(repair);
}
/*****************************************************************/
/* This function checks whether or not a given state has a */
/* candidate, whose string representaion is a merging of the two */
/* tokens at positions buffer_position and buffer_position+1 in */
/* the buffer. If so, it returns the candidate in question; */
/* otherwise it returns 0. */
/*****************************************************************/
bool DiagnoseParser::MergeCandidate(int state, int buffer_position)
{
int i,
k,
len,
len1,
len2;
const wchar_t *p1,
*p2;
wchar_t str[MAX_TERM_LENGTH + 1];
len1 = 0;
for (p1 = lex_stream -> Name(buffer[buffer_position]); *p1; p1++)
len1++;
len2 = 0;
for (p2 = lex_stream -> Name(buffer[buffer_position + 1]); *p2; p2++)
len2++;
len = len1 + len2;
p1 = lex_stream -> Name(buffer[buffer_position]);
p2 = lex_stream -> Name(buffer[buffer_position + 1]);
if (len <= MAX_TERM_LENGTH)
{
wchar_t *p0;
p0 = &str[0];
for (i = 0; i < len1; i++)
*(p0++) = p1[i];
for (i = 0; i < len2; i++)
*(p0++) = p2[i];
*p0 = U_NULL;
for (i = asi(state); asr[i] != 0; i++)
{
k = terminal_index[asr[i]];
if (len == name_length[k])
{
const char *p3 = &string_buffer[name_start[k]];
p0 = &str[0];
while (*p0 != U_NULL)
{
wchar_t c = *(p3++);
#ifdef EBCDIC
c = Code::ToASCII(c);
#endif
if (Case::ToAsciiLower(*p0) != Case::ToAsciiLower(c))
break;
p0++;
}
if (*p0 == U_NULL)
return asr[i];
}
}
}
return 0;
}
/*****************************************************************/
/* This procedure takes as arguments a parsing configuration */
/* consisting of a state stack (stack and stack_top) and a fixed */
/* number of input tokens (starting at buffer_position) in the */
/* input BUFFER; and some reference arguments: repair_code, */
/* distance, misspell_index, candidate, and stack_position */
/* which it sets based on the best possible recovery that it */
/* finds in the given configuration. The effectiveness of a */
/* a repair is judged based on two criteria: */
/* */
/* 1) the number of tokens that can be parsed after the repair */
/* is applied: distance. */
/* 2) how close to perfection is the candidate that is chosen: */
/* misspell_index. */
/* When this procedure is entered, distance, misspell_index and */
/* repair_code are assumed to be initialized. */
/*****************************************************************/
PrimaryRepairInfo DiagnoseParser::CheckPrimaryDistance
(int stck[], int stack_top, PrimaryRepairInfo repair)
{
PrimaryRepairInfo scope_repair;
int i,
j,
k,
next_state,
max_pos,
act,
root,
symbol,
tok;
/*****************************************************************/
/* First, try scope and manual recovery. */
/*****************************************************************/
#if defined(SCOPE_REPAIR)
scope_repair = ScopeTrial(stck, stack_top, repair);
if (scope_repair.distance > repair.distance)
repair = scope_repair;
#endif
/*****************************************************************/
/* Next, try merging the error token with its successor. */
/*****************************************************************/
symbol = MergeCandidate(stck[stack_top], repair.buffer_position);
if (symbol != 0)
{
j = ParseCheck(stck, stack_top,
symbol, repair.buffer_position+2);
if ((j > repair.distance) ||
(j == repair.distance && repair.misspell_index < 10))
{
repair.misspell_index = 10;
repair.symbol = symbol;
repair.distance = j;
repair.code = MERGE_CODE;
}
}
/*****************************************************************/
/* Next, try deletion of the error token. */
/*****************************************************************/
j = ParseCheck(stck, stack_top,
lex_stream -> Kind(buffer[repair.buffer_position+1]),
repair.buffer_position+2);
if (lex_stream -> Kind(buffer[repair.buffer_position]) == EOLT_SYMBOL &&
lex_stream -> AfterEol(buffer[repair.buffer_position+1]))
k = 10;
else k = 0;
if (j > repair.distance || (j == repair.distance && k > repair.misspell_index))
{
repair.misspell_index = k;
repair.code = DELETION_CODE;
repair.distance = j;
}
/*****************************************************************/
/* Update the error configuration by simulating all reduce and */
/* goto actions induced by the error token. Then assign the top */
/* most state of the new configuration to next_state. */
/*****************************************************************/
next_state = stck[stack_top];
max_pos = stack_top;
temp_stack_top = stack_top - 1;
tok = lex_stream -> Kind(buffer[repair.buffer_position]);
lex_stream -> Reset(buffer[repair.buffer_position + 1]);
act = t_action(next_state, tok, lex_stream);
while(act <= NUM_RULES)
{
do
{
temp_stack_top -= (rhs[act]-1);
symbol = lhs[act];
act = (temp_stack_top > max_pos
? temp_stack[temp_stack_top]
: stck[temp_stack_top]);
act = nt_action(act, symbol);
} while(act <= NUM_RULES);
max_pos = Min(max_pos, temp_stack_top);
temp_stack[temp_stack_top + 1] = act;
next_state = act;
act = t_action(next_state, tok, lex_stream);
}
/*****************************************************************/
/* Next, place the list of candidates in proper order. */
/*****************************************************************/
root = 0;
for (i = asi(next_state); asr[i] != 0; i++)
{
symbol = asr[i];
if (symbol != EOFT_SYMBOL && symbol != ERROR_SYMBOL)
{
if (root == 0)
list[symbol] = symbol;
else
{
list[symbol] = list[root];
list[root] = symbol;
}
root = symbol;
}
}
if (stck[stack_top] != next_state)
{
for (i = asi(stck[stack_top]); asr[i] != 0; i++)
{
symbol = asr[i];
if (symbol != EOFT_SYMBOL && symbol != ERROR_SYMBOL &&
list[symbol] == 0)
{
if (root == 0)
list[symbol] = symbol;
else
{
list[symbol] = list[root];
list[root] = symbol;
}
root = symbol;
}
}
}
i = list[root];
list[root] = 0;
root = i;
/*****************************************************************/
/* Next, try insertion for each possible candidate available in */
/* the current state, except EOFT and ERROR_SYMBOL. */
/*****************************************************************/
symbol = root;
while(symbol != 0)
{
if (symbol == EOLT_SYMBOL &&
lex_stream -> AfterEol(buffer[repair.buffer_position]))
k = 10;
else k = 0;
j = ParseCheck(stck, stack_top,
symbol, repair.buffer_position);
if (j > repair.distance)
{
repair.misspell_index = k;
repair.distance = j;
repair.symbol = symbol;
repair.code = INSERTION_CODE;
}
else if (j == repair.distance && k > repair.misspell_index)
{
repair.misspell_index = k;
repair.distance = j;
repair.symbol = symbol;
repair.code = INSERTION_CODE;
}
symbol = list[symbol];
}
/*****************************************************************/
/* Next, Try substitution for each possible candidate available */
/* in the current state, except EOFT and ERROR_SYMBOL. */
/*****************************************************************/
symbol = root;
while(symbol != 0)
{
if (symbol == EOLT_SYMBOL &&
lex_stream -> AfterEol(buffer[repair.buffer_position+1]))
k = 10;
else k = Misspell(symbol, buffer[repair.buffer_position]);
j = ParseCheck(stck, stack_top,
symbol, repair.buffer_position+1);
if (j > repair.distance)
{
repair.misspell_index = k;
repair.distance = j;
repair.symbol = symbol;
repair.code = SUBSTITUTION_CODE;
}
else if (j == repair.distance && k > repair.misspell_index)
{
repair.misspell_index = k;
repair.symbol = symbol;
repair.code = SUBSTITUTION_CODE;
}
i = symbol;
symbol = list[symbol];
list[i] = 0; /* reset element */
}
/*****************************************************************/
/* Next, we try to insert a nonterminal candidate in front of the*/
/* error token, or substituting a nonterminal candidate for the */
/* error token. Precedence is given to insertion. */
/*****************************************************************/
for (i = nasi(stck[stack_top]); nasr[i] != 0; i++)
{
symbol = nasr[i] + NT_OFFSET;
j = ParseCheck(stck, stack_top,
symbol, repair.buffer_position+1);
if (j > repair.distance)
{
repair.misspell_index = 0;
repair.distance = j;
repair.symbol = symbol;
repair.code = INVALID_CODE;
}
j = ParseCheck(stck, stack_top, symbol, repair.buffer_position);
if ((j > repair.distance) ||
(j == repair.distance && repair.code == INVALID_CODE))
{
repair.misspell_index = 0;
repair.distance = j;
repair.symbol = symbol;
repair.code = INSERTION_CODE;
}
}
return repair;
}
/*****************************************************************/
/* This procedure is invoked to issue a diagnostic message and */
/* adjust the input buffer. The recovery in question is either */
/* the insertion of one or more scopes, the merging of the error */
/* token with its successor, the deletion of the error token, */
/* the insertion of a single token in front of the error token */
/* or the substitution of another token for the error token. */
/*****************************************************************/
RepairCandidate DiagnoseParser::PrimaryDiagnosis(PrimaryRepairInfo repair)
{
RepairCandidate candidate;
int name_index;
/*****************************************************************/
/* Issue diagnostic. */
/*****************************************************************/
TokenObject prevtok = buffer[repair.buffer_position - 1],
curtok = buffer[repair.buffer_position];
switch(repair.code)
{
case INSERTION_CODE: case BEFORE_CODE:
{
if (repair.symbol > NT_OFFSET)
name_index = GetNtermIndex(stack[state_stack_top],
repair.symbol,
repair.buffer_position);
else name_index = GetTermIndex(stack,
state_stack_top,
repair.symbol,
repair.buffer_position);
//
// TokenObject t = curtok;
// if (repair.code == INSERTION_CODE)
// {
// if (repair.symbol != EOLT_SYMBOL && lex_stream -> AfterEol(curtok))
// repair.code = BEFORE_CODE;
// else t = prevtok;
// }
//
// error.report(0, repair.code, name_index, Loc(t), t);
//
TokenObject t = (repair.code == INSERTION_CODE ? prevtok : curtok);
error.Report(0, repair.code, name_index, Loc(t), t);
break;
}
case INVALID_CODE:
{
name_index = GetNtermIndex(stack[state_stack_top],
repair.symbol,
repair.buffer_position + 1);
error.Report(0, repair.code, name_index, Loc(curtok), curtok);
break;
}
case SUBSTITUTION_CODE:
{
if (repair.misspell_index >= 6)
name_index = terminal_index[repair.symbol];
else
{
name_index = GetTermIndex(stack, state_stack_top,
repair.symbol,
repair.buffer_position + 1);
if (name_index != terminal_index[repair.symbol])
repair.code = INVALID_CODE;
}
error.Report(0, repair.code, name_index, Loc(curtok), curtok);
break;
}
case MERGE_CODE:
{
error.Report(0, repair.code, terminal_index[repair.symbol],
Loc(curtok), lex_stream -> Next(curtok));
break;
}
#if defined(SCOPE_REPAIR)
case SCOPE_CODE:
{
for (int i = 0; i < scope_stack_top; i++)
{
error.Report(0, repair.code,
-scope_index[i],
location_stack[scope_position[i]],
prevtok,
non_terminal_index[scope_lhs[scope_index[i]]]);
}
repair.symbol = scope_lhs[scope_index[scope_stack_top]] + NT_OFFSET;
state_stack_top = scope_position[scope_stack_top];
error.Report(0, repair.code,
-scope_index[scope_stack_top],
location_stack[scope_position[scope_stack_top]],
prevtok,
GetNtermIndex(stack[state_stack_top],
repair.symbol,
repair.buffer_position)
);
break;
}
#endif
default: /* deletion */
{
error.Report(0, repair.code, terminal_index[ERROR_SYMBOL],
Loc(curtok), curtok);
}
}
/*****************************************************************/
/* Update buffer. */
/*****************************************************************/
switch (repair.code)
{
case INSERTION_CODE: case BEFORE_CODE:
#if defined(SCOPE_REPAIR)
case SCOPE_CODE:
#endif
{
candidate.symbol = repair.symbol;
candidate.location = Loc(buffer[repair.buffer_position]);
lex_stream -> Reset(buffer[repair.buffer_position]);
break;
}
case INVALID_CODE: case SUBSTITUTION_CODE:
{
candidate.symbol = repair.symbol;
candidate.location = Loc(buffer[repair.buffer_position]);
lex_stream -> Reset(buffer[repair.buffer_position + 1]);
break;
}
case MERGE_CODE:
{
candidate.symbol = repair.symbol;
candidate.location = Loc(buffer[repair.buffer_position]);
lex_stream -> Reset(buffer[repair.buffer_position + 2]);
break;
}
default: /* deletion */
{
candidate.location = Loc(buffer[repair.buffer_position + 1]);
candidate.symbol =
lex_stream -> Kind(buffer[repair.buffer_position + 1]);
lex_stream -> Reset(buffer[repair.buffer_position + 2]);
break;
}
}
return candidate;
}
/*****************************************************************/
/* This function takes as parameter an integer STACK_TOP that */
/* points to a STACK element containing the state on which a */
/* primary recovery will be made; the terminal candidate on which*/
/* to recover; and an integer: buffer_position, which points to */
/* the position of the next input token in the BUFFER. The */
/* parser is simulated until a shift (or shift-reduce) action */
/* is computed on the candidate. Then we proceed to compute the */
/* the name index of the highest level nonterminal that can */
/* directly or indirectly produce the candidate. */
/*****************************************************************/
int DiagnoseParser::GetTermIndex(int stck[], int stack_top,
int tok, int buffer_position)
{
/*****************************************************************/
/* Initialize stack index of temp_stack and initialize maximum */
/* position of state stack that is still useful. */
/*****************************************************************/
int act = stck[stack_top],
max_pos = stack_top,
highest_symbol = tok;
temp_stack_top = stack_top - 1;
/*****************************************************************/
/* Compute all reduce and associated actions induced by the */
/* candidate until a SHIFT or SHIFT-REDUCE is computed. ERROR */
/* and ACCEPT actions cannot be computed on the candidate in */
/* this context, since we know that it is suitable for recovery. */
/*****************************************************************/
lex_stream -> Reset(buffer[buffer_position]);
act = t_action(act, tok, lex_stream);
while(act <= NUM_RULES)
{
/*********************************************************/
/* Process all goto-reduce actions following reduction, */
/* until a goto action is computed ... */
/*********************************************************/
do
{
temp_stack_top -= (rhs[act]-1);
int lhs_symbol = lhs[act];
act = (temp_stack_top > max_pos
? temp_stack[temp_stack_top]
: stck[temp_stack_top]);
act = nt_action(act, lhs_symbol);
} while(act <= NUM_RULES);
/*********************************************************/
/* Compute new maximum useful position of (STATE_)stack, */
/* push goto state into the stack, and compute next */
/* action on candidate ... */
/*********************************************************/
max_pos = Min(max_pos, temp_stack_top);
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
/********************************************************************/
/* At this stage, we have simulated all actions induced by the */
/* candidate and we are ready to shift or shift-reduce it. First, */
/* set tok and next_ptr appropriately and identify the candidate */
/* as the initial highest_symbol. If a shift action was computed */
/* on the candidate, update the stack and compute the next */
/* action. Next, simulate all actions possible on the next input */
/* token until we either have to shift it or are about to reduce */
/* below the initial starting point in the stack (indicated by */
/* max_pos as computed in the previous loop). At that point, */
/* return the highest_symbol computed. */
/********************************************************************/
temp_stack_top++; /* adjust top of stack to reflect last goto */
/* next move is shift or shift-reduce. */
int threshold = temp_stack_top;
tok = lex_stream -> Kind(buffer[buffer_position]);
lex_stream -> Reset(buffer[buffer_position + 1]);
if (act > ERROR_ACTION) /* shift-reduce on candidate? */
act -= ERROR_ACTION;
else /* shift on candidate ! */
{
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
while(act <= NUM_RULES)
{
/*********************************************************/
/* Process all goto-reduce actions following reduction, */
/* until a goto action is computed ... */
/*********************************************************/
do
{
temp_stack_top -= (rhs[act]-1);
if (temp_stack_top < threshold)
goto quit;
int lhs_symbol = lhs[act];
if (temp_stack_top == threshold)
highest_symbol = lhs_symbol + NT_OFFSET;
act = (temp_stack_top > max_pos
? temp_stack[temp_stack_top]
: stck[temp_stack_top]);
act = nt_action(act, lhs_symbol);
} while(act <= NUM_RULES);
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
quit:
return (highest_symbol > NT_OFFSET
? non_terminal_index[highest_symbol - NT_OFFSET]
: terminal_index[highest_symbol]);
}
/*****************************************************************/
/* This function takes as parameter a starting state number: */
/* start, a nonterminal symbol, A (candidate), and an integer, */
/* buffer_position, which points to the position of the next */
/* input token in the BUFFER. */
/* It returns the highest level non-terminal B such that */
/* B =>*rm A. I.e., there does not exists a nonterminal C such */
/* that C =>+rm B. (Recall that for an LALR(k) grammar if */
/* C =>+rm B, it cannot be the case that B =>+rm C) */
/*****************************************************************/
int DiagnoseParser::GetNtermIndex(int start, int sym, int buffer_position)
{
int act,
tok,
highest_symbol;
highest_symbol = sym - NT_OFFSET;
tok = lex_stream -> Kind(buffer[buffer_position]);
lex_stream -> Reset(buffer[buffer_position + 1]);
/*****************************************************************/
/* Initialize stack index of temp_stack and initialize maximum */
/* position of state stack that is still useful. */
/*****************************************************************/
temp_stack_top = 0;
temp_stack[temp_stack_top] = start;
act = nt_action(start, highest_symbol);
if (act > NUM_RULES) /* goto action? */
{
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
while(act <= NUM_RULES)
{
/*********************************************************/
/* Process all goto-reduce actions following reduction, */
/* until a goto action is computed ... */
/*********************************************************/
do
{
temp_stack_top -= (rhs[act]-1);
if (temp_stack_top < 0)
return non_terminal_index[highest_symbol];
if (temp_stack_top == 0)
highest_symbol = lhs[act];
act = nt_action(temp_stack[temp_stack_top], lhs[act]);
} while(act <= NUM_RULES);
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
return non_terminal_index[highest_symbol];
}
/*****************************************************************/
/* Check whether or not there is a high probability that a */
/* given string is a misspelling of another. */
/* Certain singleton symbols (such as ":" and ";") are also */
/* considered to be misspelling of each other. */
/*****************************************************************/
int DiagnoseParser::Misspell(int sym, TokenObject tok)
{
int len = name_length[terminal_index[sym]];
wchar_t *keyword = new wchar_t[len + 1];
for (int i = name_start[terminal_index[sym]], j = 0; j < len; i++, j++)
{
wchar_t c = string_buffer[i];
#ifdef EBCDIC
c = Code::ToASCII(c);
#endif
keyword[j] = c;
}
keyword[len] = U_NULL;
int index = Spell::Index(lex_stream -> Name(tok), keyword);
delete [] keyword;
return index;
}
/*****************************************************************/
/*****************************************************************/
PrimaryRepairInfo DiagnoseParser::ScopeTrial(int stck[], int stack_top,
PrimaryRepairInfo repair)
{
state_seen = new int[stack_length];
for (int i = 0; i < stack_length; i++)
state_seen[i] = NIL;
ScopeTrialCheck(stck, stack_top, repair, 0);
delete [] state_seen;
state_pool.Reset();
repair.code = SCOPE_CODE;
repair.misspell_index = 10;
return repair;
}
/*****************************************************************/
/* SCOPE_TRIAL_CHECK is a recursive procedure that takes as arguments */
/* a state configuration: STACK and STACK_TOP, and an integer: */
/* INDX. In addition, a global variable SCOPE_DISTANCE indicates */
/* the distance to "beat" and SCOPE_BUFFER_POSITION identifies */
/* the starting position of the next input tokens in BUFFER. */
/* SCOPE_TRIAL determines whether or not scope recovery is */
/* possible. If so, it uses two global arrays: SCOPE_INDEX and */
/* SCOPE_LOCATION to store the necessary information about the */
/* scopes to be closed. A global variable, scope_stack_top, identifies */
/* the highest element used in these arrays. */
/* The global variable SCOPE_DISTANCE is also updated to reflect */
/* the new result. */
/*****************************************************************/
void DiagnoseParser::ScopeTrialCheck(int stck[], int stack_top,
PrimaryRepairInfo& repair, int indx)
{
int max_pos,
marked_pos,
stack_position,
distance,
previous_distance,
i,
j,
k,
act,
tok,
lhs_symbol;
act = stck[stack_top];
for (i = state_seen[stack_top]; i != NIL; i = state_pool[i].next)
if (state_pool[i].state == act)
return;
i = state_pool.NextIndex();
state_pool[i].state = act;
state_pool[i].next = state_seen[stack_top];
state_seen[stack_top] = i;
for (i = 0; i < SCOPE_SIZE; i++)
{
/*********************************************************/
/* Use the scope lookahead symbol to force all reductions*/
/* inducible by that symbol. */
/*********************************************************/
act = stck[stack_top];
temp_stack_top = stack_top - 1;
max_pos = stack_top;
tok = scope_la[i];
lex_stream -> Reset(buffer[repair.buffer_position]);
act = t_action(act, tok, lex_stream);
while(act <= NUM_RULES)
{
/*************************************************/
/* ... Process all goto-reduce actions following */
/* reduction, until a goto action is computed ...*/
/*************************************************/
do
{
temp_stack_top -= (rhs[act]-1);
lhs_symbol = lhs[act];
act = (temp_stack_top > max_pos
? temp_stack[temp_stack_top]
: stck[temp_stack_top]);
act = nt_action(act, lhs_symbol);
} while(act <= NUM_RULES);
if (temp_stack_top + 1 >= stack_length)
return;
max_pos = Min(max_pos, temp_stack_top);
temp_stack[temp_stack_top + 1] = act;
act = t_action(act, tok, lex_stream);
}
/*********************************************************/
/* If the lookahead symbol is parsable, then we check */
/* whether or not we have a match between the scope */
/* prefix and the transition symbols corresponding to */
/* the states on top of the stack. */
/*********************************************************/
if (act != ERROR_ACTION)
{
k = scope_prefix[i];
for (j = temp_stack_top + 1;
j >= (max_pos + 1) &&
in_symbol(temp_stack[j]) == scope_rhs[k]; j--)
k++;
if (j == max_pos)
for (j = max_pos;
j >= 1 && in_symbol(stck[j]) == scope_rhs[k];
j--)
k++;
/*****************************************************/
/* If the prefix matches, check whether the state */
/* newly exposed on top of the stack, (after the */
/* corresponding prefix states are popped from the */
/* stack), is in the set of "source states" for the */
/* scope in question and that it is at a position */
/* below the threshold indicated by MARKED_POS. */
/*****************************************************/
marked_pos = (max_pos < stack_top ? max_pos + 1
: stack_top);
if (scope_rhs[k] == 0 && j < marked_pos) /* match? */
{
stack_position = j;
for (j = scope_state_set[i];
stck[stack_position] != scope_state[j] &&
scope_state[j] != 0;
j++);
/*************************************************/
/* If the top state is valid for scope recovery, */
/* the left-hand side of the scope is used as */
/* starting symbol and we calculate how far the */
/* parser can advance within the forward context */
/* after parsing the left-hand symbol. */
/*************************************************/
if (scope_state[j] != 0) /* state was found */
{
previous_distance = repair.distance;
distance = ParseCheck(stck,
stack_position,
scope_lhs[i]+NT_OFFSET,
repair.buffer_position);
/*********************************************/
/* if the recovery is not successful, we */
/* update the stack with all actions induced */
/* by the left-hand symbol, and recursively */
/* call SCOPE_TRIAL_CHECK to try again. */
/* Otherwise, the recovery is successful. If */
/* the new distance is greater than the */
/* initial SCOPE_DISTANCE, we update */
/* SCOPE_DISTANCE and set scope_stack_top to INDX */
/* to indicate the number of scopes that are */
/* to be applied for a succesful recovery. */
/* NOTE that this procedure cannot get into */
/* an infinite loop, since each prefix match */
/* is guaranteed to take us to a lower point */
/* within the stack. */
/*********************************************/
if ((distance - repair.buffer_position + 1) <
MIN_DISTANCE)
{
int top = stack_position;
act = nt_action(stck[top], scope_lhs[i]);
while(act <= NUM_RULES)
{
top -= (rhs[act]-1);
act = nt_action(stck[top], lhs[act]);
}
top++;
j = act;
act = stck[top]; /* save */
stck[top] = j; /* swap */
ScopeTrialCheck(stck, top, repair, indx+1);
stck[top] = act; /* restore */
}
else if (distance > repair.distance)
{
scope_stack_top = indx;
repair.distance = distance;
}
if (lex_stream -> Kind(buffer[repair.buffer_position]) ==
EOFT_SYMBOL &&
repair.distance == previous_distance)
{
scope_stack_top = indx;
repair.distance = MAX_DISTANCE;
}
/*********************************************/
/* If this scope recovery has beaten the */
/* previous distance, then we have found a */
/* better recovery (or this recovery is one */
/* of a list of scope recoveries). Record */
/* its information at the proper location */
/* (INDX) in SCOPE_INDEX and SCOPE_STACK. */
/*********************************************/
if (repair.distance > previous_distance)
{
scope_index[indx] = i;
scope_position[indx] = stack_position;
return;
}
}
}
}
}
return;
}
/*****************************************************************/
/* This function computes the ParseCheck distance for the best */
/* possible secondary recovery for a given configuration that */
/* either deletes none or only one symbol in the forward context.*/
/* If the recovery found is more effective than the best primary */
/* recovery previously computed, then the function returns true. */
/* Only misplacement, scope and manual recoveries are attempted; */
/* simple insertion or substitution of a nonterminal are tried */
/* in CHECK_PRIMARY_DISTANCE as part of primary recovery. */
/*****************************************************************/
bool DiagnoseParser::SecondaryCheck(int stck[], int stack_top,
int buffer_position, int distance)
{
PrimaryRepairInfo repair;
int top,
j;
for (top = stack_top - 1; top >= 0; top--)
{
j = ParseCheck(stck, top,
lex_stream -> Kind(buffer[buffer_position]),
buffer_position + 1);
if (((j - buffer_position + 1) > MIN_DISTANCE) &&
(j > distance))
return true;
}
#if defined(SCOPE_REPAIR)
repair.buffer_position = buffer_position + 1;
repair.distance = distance;
repair = ScopeTrial(stck, stack_top, repair);
if ((repair.distance - buffer_position) > MIN_DISTANCE &&
repair.distance > distance)
return true;
//#elif !defined(SCOPE_REPAIR)
// manual_buffer_position = buffer_position + 1;
// manual_distance = distance;
// manual_trial(stck, stack_top);
// if ((manual_distance - repair.buffer_position) > MIN_DISTANCE &&
// manual_distance > distance)
// return true;
#endif
return false;
}
/*****************************************************************/
/* Secondary_phase is a boolean function that checks whether or */
/* not some form of secondary recovery is applicable to one of */
/* the error configurations. First, if "next_stack" is available,*/
/* misplacement and secondary recoveries are attempted on it. */
/* Then, in any case, these recoveries are attempted on "stack". */
/* If a successful recovery is found, a diagnosis is issued, the */
/* configuration is updated and the function returns "true". */
/* Otherwise, the function returns false. */
/*****************************************************************/
RepairCandidate DiagnoseParser::SecondaryPhase(TokenObject error_token)
{
SecondaryRepairInfo repair,
misplaced;
RepairCandidate candidate;
int i,
j,
k,
top,
next_last_index,
last_index;
candidate.symbol = 0;
repair.code = 0;
repair.distance = 0;
repair.recovery_on_next_stack = false;
misplaced.distance = 0;
misplaced.recovery_on_next_stack = false;
/*****************************************************************/
/* If the next_stack is available, try misplaced and secondary */
/* recovery on it first. */
/*****************************************************************/
if (next_stack_top >= 0)
{
Location save_location;
buffer[2] = error_token;
buffer[1] = lex_stream -> Previous(buffer[2]);
buffer[0] = lex_stream -> Previous(buffer[1]);
for (k = 3; k < BUFF_UBOUND; k++)
buffer[k] = lex_stream -> Next(buffer[k - 1]);
buffer[BUFF_UBOUND] = lex_stream -> Badtoken();/* elmt not available */
/*********************************************************/
/* If we are at the end of the input stream, compute the */
/* index position of the first EOFT symbol (last useful */
/* index). */
/*********************************************************/
for (next_last_index = MAX_DISTANCE - 1;
next_last_index >= 1 &&
lex_stream -> Kind(buffer[next_last_index]) == EOFT_SYMBOL;
next_last_index--);
next_last_index = next_last_index + 1;
save_location = location_stack[next_stack_top];
location_stack[next_stack_top] = Loc(buffer[2]);
misplaced.num_deletions = next_stack_top;
misplaced = MisplacementRecovery(next_stack, next_stack_top,
next_last_index,
misplaced, true);
if (misplaced.recovery_on_next_stack)
misplaced.distance++;
repair.num_deletions = next_stack_top + BUFF_UBOUND;
repair = SecondaryRecovery(next_stack, next_stack_top,
next_last_index,
repair, true);
if (repair.recovery_on_next_stack)
repair.distance++;
location_stack[next_stack_top] = save_location;
}
else /* next_stack not available, initialize ... */
{
misplaced.num_deletions = state_stack_top;
repair.num_deletions = state_stack_top + BUFF_UBOUND;
}
/*****************************************************************/
/* Try secondary recovery on the "stack" configuration. */
/*****************************************************************/
buffer[3] = error_token;
buffer[2] = lex_stream -> Previous(buffer[3]);
buffer[1] = lex_stream -> Previous(buffer[2]);
buffer[0] = lex_stream -> Previous(buffer[1]);
for (k = 4; k < BUFF_SIZE; k++)
buffer[k] = lex_stream -> Next(buffer[k - 1]);
for (last_index = MAX_DISTANCE - 1;
last_index >= 1 && lex_stream -> Kind(buffer[last_index]) == EOFT_SYMBOL;
last_index--);
last_index++;
misplaced = MisplacementRecovery(stack, state_stack_top,
last_index,
misplaced, false);
repair = SecondaryRecovery(stack, state_stack_top,
last_index, repair, false);
/*****************************************************************/
/* If a successful misplaced recovery was found, compare it with */
/* the most successful secondary recovery. If the misplaced */
/* recovery either deletes fewer symbols or parse-checks further */
/* then it is chosen. */
/*****************************************************************/
if (misplaced.distance > MIN_DISTANCE)
{
if (misplaced.num_deletions <= repair.num_deletions ||
(misplaced.distance - misplaced.num_deletions) >=
(repair.distance - repair.num_deletions))
{
repair.code = MISPLACED_CODE;
repair.stack_position = misplaced.stack_position;
repair.buffer_position = 2;
repair.num_deletions = misplaced.num_deletions;
repair.distance = misplaced.distance;
repair.recovery_on_next_stack = misplaced.recovery_on_next_stack;
}
}
/*****************************************************************/
/* If the successful recovery was on next_stack, update: stack, */
/* buffer, location_stack and last_index. */
/*****************************************************************/
if (repair.recovery_on_next_stack)
{
state_stack_top = next_stack_top;
for (i = 0; i <= state_stack_top; i++)
stack[i] = next_stack[i];
buffer[2] = error_token;
buffer[1] = lex_stream -> Previous(buffer[2]);
buffer[0] = lex_stream -> Previous(buffer[1]);
for (k = 3; k < BUFF_UBOUND; k++)
buffer[k] = lex_stream -> Next(buffer[k - 1]);
buffer[BUFF_UBOUND] = lex_stream -> Badtoken();/* elmt not available */
location_stack[next_stack_top] = Loc(buffer[2]);
last_index = next_last_index;
}
#if defined(SCOPE_REPAIR)
/*****************************************************************/
/* Next, try scope recoveries after deletion of one, two, three, */
/* four ... buffer_position tokens from the input stream. */
/*****************************************************************/
if (repair.code == SECONDARY_CODE ||
repair.code == DELETION_CODE)
{
PrimaryRepairInfo scope_repair;
scope_repair.distance = 0;
for (scope_repair.buffer_position = 2;
scope_repair.buffer_position <= repair.buffer_position &&
repair.code != SCOPE_CODE; scope_repair.buffer_position++)
{
scope_repair = ScopeTrial(stack, state_stack_top, scope_repair);
j = (scope_repair.distance == MAX_DISTANCE
? last_index
: scope_repair.distance);
k = scope_repair.buffer_position - 1;
if ((j - k) > MIN_DISTANCE &&
(j - k) > (repair.distance - repair.num_deletions))
{
repair.code = SCOPE_CODE;
i = scope_index[scope_stack_top]; /* upper bound */
repair.symbol = scope_lhs[i] + NT_OFFSET;
repair.stack_position = state_stack_top;
repair.buffer_position = scope_repair.buffer_position;
}
}
}
/*****************************************************************/
/* If no successful recovery is found and we have reached the */
/* end of the file, check whether or not scope recovery is */
/* applicable at the end of the file after discarding some */
/* states. */
/*****************************************************************/
if (repair.code == 0 &&
lex_stream -> Kind(buffer[last_index]) == EOFT_SYMBOL)
{
PrimaryRepairInfo scope_repair;
scope_repair.buffer_position = last_index;
scope_repair.distance = 0;
for (top = state_stack_top;
top >= 0 && repair.code == 0; top--)
{
scope_repair = ScopeTrial(stack, top, scope_repair);
if (scope_repair.distance > 0)
{
repair.code = SCOPE_CODE;
i = scope_index[scope_stack_top]; /* upper bound */
repair.symbol = scope_lhs[i] + NT_OFFSET;
repair.stack_position = top;
repair.buffer_position = scope_repair.buffer_position;
}
}
}
#endif
/*****************************************************************/
/* If a successful repair was not found, quit! Otherwise, issue */
/* diagnosis and adjust configuration... */
/*****************************************************************/
if (repair.code == 0)
return candidate;
SecondaryDiagnosis(repair);
/*****************************************************************/
/* Update buffer based on number of elements that are deleted. */
/*****************************************************************/
switch(repair.code)
{
case MANUAL_CODE: case MISPLACED_CODE:
candidate.location = Loc(buffer[2]);
candidate.symbol = lex_stream -> Kind(buffer[2]);
lex_stream -> Reset(lex_stream -> Next(buffer[2]));
break;
case DELETION_CODE:
candidate.location = Loc(buffer[repair.buffer_position]);
candidate.symbol =
lex_stream -> Kind(buffer[repair.buffer_position]);
lex_stream -> Reset(lex_stream -> Next(buffer[repair.buffer_position]));
break;
default: /* SCOPE_CODE || SECONDARY_CODE */
candidate.symbol = repair.symbol;
candidate.location = Loc(buffer[repair.buffer_position]);
lex_stream -> Reset(buffer[repair.buffer_position]);
break;
}
return candidate;
}
/*****************************************************************/
/* This boolean function checks whether or not a given */
/* configuration yields a better misplacement recovery than */
/* the best misplacement recovery computed previously. */
/*****************************************************************/
SecondaryRepairInfo DiagnoseParser::MisplacementRecovery
(int stck[], int stack_top, int last_index,
SecondaryRepairInfo repair, bool stack_flag)
{
Location previous_loc = Loc(buffer[2]);
int stack_deletions = 0;
for (int top = stack_top - 1; top >= 0; top--)
{
if (location_stack[top] < previous_loc)
stack_deletions++;
previous_loc = location_stack[top];
int j = ParseCheck(stck, top, lex_stream -> Kind(buffer[2]), 3);
if (j == MAX_DISTANCE)
j = last_index;
if ((j > MIN_DISTANCE) &&
(j - stack_deletions) >
(repair.distance - repair.num_deletions))
{
repair.stack_position = top;
repair.distance = j;
repair.num_deletions = stack_deletions;
repair.recovery_on_next_stack = stack_flag;
}
}
return repair;
}
/*****************************************************************/
/* This boolean function checks whether or not a given */
/* configuration yields a better secondary recovery than the */
/* best misplacement recovery computed previously. */
/*****************************************************************/
SecondaryRepairInfo DiagnoseParser::SecondaryRecovery
(int stck[],int stack_top,
int last_index, SecondaryRepairInfo repair, bool stack_flag)
{
int i,
j,
k,
l,
symbol,
stack_deletions,
top;
Location previous_loc;
stack_deletions = 0;
previous_loc = Loc(buffer[2]);
for (top = stack_top;
top >= 0 && repair.num_deletions >= stack_deletions; top--)
{
if (location_stack[top] < previous_loc)
stack_deletions++;
previous_loc = location_stack[top];
for (i = 2;
i <= (last_index - MIN_DISTANCE + 1) &&
(repair.num_deletions >= (stack_deletions + i - 1)); i++)
{
j = ParseCheck(stck, top, lex_stream -> Kind(buffer[i]), i + 1);
if (j == MAX_DISTANCE)
j = last_index;
if ((j - i + 1) > MIN_DISTANCE)
{
k = stack_deletions + i - 1;
if ((k < repair.num_deletions) ||
(j - k) > (repair.distance - repair.num_deletions) ||
((repair.code == SECONDARY_CODE) &&
(j - k) == (repair.distance -
repair.num_deletions)))
{
repair.code = DELETION_CODE;
repair.distance = j;
repair.stack_position = top;
repair.buffer_position = i;
repair.num_deletions = k;
repair.recovery_on_next_stack = stack_flag;
}
}
for (l = nasi(stck[top]); l >= 0 && nasr[l] != 0; l++)
{
symbol = nasr[l] + NT_OFFSET;
j = ParseCheck(stck, top, symbol, i);
if (j == MAX_DISTANCE)
j = last_index;
if ((j - i + 1) > MIN_DISTANCE)
{
k = stack_deletions + i - 1;
if (k < repair.num_deletions ||
(j - k) > (repair.distance -
repair.num_deletions))
{
repair.code = SECONDARY_CODE;
repair.symbol = symbol;
repair.distance = j;
repair.stack_position = top;
repair.buffer_position = i;
repair.num_deletions = k;
repair.recovery_on_next_stack = stack_flag;
}
}
}
}
}
return repair;
}
/*****************************************************************/
/* This procedure is invoked to issue a secondary diagnosis and */
/* adjust the input buffer. The recovery in question is either */
/* an automatic scope recovery, a manual scope recovery, a */
/* secondary substitution or a secondary deletion. */
/*****************************************************************/
void DiagnoseParser::SecondaryDiagnosis(SecondaryRepairInfo repair)
{
/*****************************************************************/
/* Issue diagnostic. */
/*****************************************************************/
switch(repair.code)
#if defined(SCOPE_REPAIR)
{
case SCOPE_CODE:
{
if (repair.stack_position < state_stack_top)
{
error.Report(0, DELETION_CODE,
terminal_index[ERROR_SYMBOL],
location_stack[repair.stack_position],
buffer[1]);
// set_location(buffer[1],
// location_stack[repair.stack_position]);
}
for (int i = 0; i < scope_stack_top; i++)
{
error.Report(0, SCOPE_CODE,
-scope_index[i],
location_stack[scope_position[i]],
buffer[1],
non_terminal_index[scope_lhs[scope_index[i]]]);
}
repair.symbol = scope_lhs[scope_index[scope_stack_top]] + NT_OFFSET;
state_stack_top = scope_position[scope_stack_top];
error.Report(0, SCOPE_CODE,
-scope_index[scope_stack_top],
location_stack[scope_position[scope_stack_top]],
buffer[1],
GetNtermIndex(stack[state_stack_top],
repair.symbol,
repair.buffer_position)
);
break;
}
#endif
default:
{
error.Report(0, repair.code,
(repair.code == SECONDARY_CODE
? GetNtermIndex(stack[repair.stack_position],
repair.symbol,
repair.buffer_position)
: terminal_index[ERROR_SYMBOL]),
location_stack[repair.stack_position],
buffer[repair.buffer_position - 1]);
state_stack_top = repair.stack_position;
}
}
return;
}
//
// This procedure uses a quick sort algorithm to sort the ERRORS
// by the left_line_no and left_column_no fields.
//
void ParseError::SortMessages()
{
int lower,
upper,
lostack[32],
histack[32];
int top,
i,
j;
ErrorInfo pivot, temp;
top = 0;
lostack[top] = 0;
histack[top] = errors.Length() - 1;
while(top >= 0)
{
lower = lostack[top];
upper = histack[top];
top--;
while(upper > lower)
{
/*********************************************************/
/* The array is most-likely almost sorted. Therefore, */
/* we use the middle element as the pivot element. */
/*********************************************************/
i = (lower + upper) / 2;
pivot = errors[i];
errors[i] = errors[lower];
/*********************************************************/
/* Split the array section indicated by LOWER and UPPER */
/* using ARRAY(LOWER) as the pivot. */
/*********************************************************/
i = lower;
for (j = lower + 1; j <= upper; j++)
if ((errors[j].left_token < pivot.left_token) ||
/*****************************************************/
/* When two error messages start in the same location*/
/* and one is nested inside the other, the outer one */
/* is placed first so that it can be printed last. */
/* Recall that its right-span location is reached */
/* after the inner one has been completely processed.*/
/*****************************************************/
(errors[j].left_token == pivot.left_token &&
errors[j].right_token > pivot.right_token) ||
/*****************************************************/
/* When two error messages are at the same location */
/* span, check the NUM field to keep the sort stable.*/
/* When the location spans only a single symbol, */
/* the one with the lowest "num" is placed first. */
/*****************************************************/
(errors[j].left_token == pivot.left_token &&
errors[j].right_token == pivot.right_token &&
pivot.left_token == pivot.right_token &&
errors[j].num < pivot.num) ||
/*****************************************************/
/* When two error messages are at the same location */
/* which spans more than one symbol in the source, */
/* the first message is treated as been nested into */
/* the second message and (just like the nested case */
/* above) it is placed last in the sorted sequence. */
/*****************************************************/
(errors[j].left_token == pivot.left_token &&
errors[j].right_token == pivot.right_token &&
pivot.left_token < pivot.right_token &&
errors[j].num > pivot.num))
{
temp = errors[++i];
errors[i] = errors[j];
errors[j] = temp;
}
errors[lower] = errors[i];
errors[i] = pivot;
top++;
if ((i - lower) < (upper - i))
{
lostack[top] = i + 1;
histack[top] = upper;
upper = i - 1;
}
else
{
histack[top] = i - 1;
lostack[top] = lower;
lower = i + 1;
}
}
}
return;
}
/*****************************************************************/
/* This procedure is invoked by an JIKES PARSER or a semantic */
/* routine to process an error message. The JIKES parser always */
/* passes the value 0 to msg_level to indicate an error. */
/* This routine simply stores all necessary information about */
/* the message into an array: error. */
/*****************************************************************/
void ParseError::Report(int msg_level,
int msg_code,
int name_index,
LexStream::TokenIndex left_token,
LexStream::TokenIndex right_token,
int scope_name_index)
{
int i = errors.NextIndex();
errors[i].msg_level = msg_level;
errors[i].msg_code = msg_code;
errors[i].name_index = name_index;
errors[i].num = i;
errors[i].left_token = (left_token > Loc(right_token) ? Loc(right_token) : left_token);
errors[i].right_token = Loc(right_token);
errors[i].right_string_length = lex_stream -> NameLength(right_token);
errors[i].scope_name_index = scope_name_index;
if (control.option.dump_errors)
{
if (errors[i].left_token < errors[i].right_token)
PrintSecondaryMessage(i);
else PrintPrimaryMessage(i);
errors.Reset(1); // we only need to indicate that at least one error was detected... See print_messages
cout.flush();
}
return;
}
/*****************************************************************/
/* This procedure is invoked to print JIKES error messages that */
/* have been stored in the array: error. */
/*****************************************************************/
void ParseError::PrintMessages()
{
if (control.option.dump_errors || errors.Length() == 0)
{
cout.flush();
return;
}
if (control.option.errors)
{
cout << "\nFound " << errors.Length() << " syntax error" << (errors.Length() == 1 ? "" : "s")
<< " in \"";
Unicode::Cout(lex_stream -> FileName());
cout << "\":";
lex_stream -> RereadInput();
if (! lex_stream -> InputBuffer())
{
char *file_name = lex_stream -> FileName();
int length = lex_stream -> FileNameLength();
wchar_t *name = new wchar_t[length + 1];
for (int i = 0; i < length; i++)
name[i] = file_name[i];
name[length] = U_NULL;
control.system_semantic -> ReportSemError(SemanticError::CANNOT_REOPEN_FILE,
0,
0,
name);
delete [] name;
return;
}
}
else if (! lex_stream -> ComputeColumns())
{
char *file_name = lex_stream -> FileName();
int length = lex_stream -> FileNameLength();
wchar_t *name = new wchar_t[length + 1];
for (int i = 0; i < length; i++)
name[i] = file_name[i];
name[length] = U_NULL;
control.system_semantic -> ReportSemError(SemanticError::CANNOT_COMPUTE_COLUMNS,
0,
0,
name);
delete [] name;
}
SortMessages();
int stack_top = -1;
int *error_stack = new int[errors.Length()];
for (int k = 0; k < errors.Length(); k++)
{
int left_line_no = lex_stream -> Line(errors[k].left_token),
left_column_no = lex_stream -> Column(errors[k].left_token),
right_column_no = lex_stream -> Column(errors[k].right_token),
msg_code = errors[k].msg_code;
Location left_token = errors[k].left_token;
for (; stack_top >= 0 && (errors[error_stack[stack_top]].right_token <= left_token); stack_top--)
{
if (stack_top == 0)
PrintLargeMessage(error_stack[stack_top]);
else
{
int i = error_stack[stack_top],
j = error_stack[stack_top - 1];
if ((errors[i].msg_code != SECONDARY_CODE &&
errors[i].msg_code != MISPLACED_CODE &&
errors[i].msg_code != DELETION_CODE) ||
(errors[j].msg_code != DELETION_CODE &&
errors[j].msg_code != MISPLACED_CODE &&
errors[j].msg_code != SECONDARY_CODE))
PrintLargeMessage(i);
}
}
if (errors[k].left_token < errors[k].right_token)
{
stack_top++;
error_stack[stack_top] = k;
}
else if ((stack_top >= 0) &&
(errors[error_stack[stack_top]].msg_code ==
SECONDARY_CODE ||
errors[error_stack[stack_top]].msg_code ==
DELETION_CODE) &&
(errors[k].left_token ==
errors[error_stack[stack_top]].left_token ||
errors[k].right_token ==
errors[error_stack[stack_top]].right_token));
else /* NOTE that input_line already contains a '\n' character */
{
if (control.option.errors)
{
int m = left_column_no,
n = right_column_no + errors[k].right_string_length - 1;
cout << "\n\n";
cout.width(6);
cout << left_line_no << ". ";
for (int i = lex_stream -> LineStart(left_line_no); i <= lex_stream -> LineEnd(left_line_no); i++)
Unicode::Cout(lex_stream -> InputBuffer()[i]);
if ((msg_code == SUBSTITUTION_CODE) ||
(n == m) ||
(msg_code == BEFORE_CODE) ||
(msg_code == INVALID_CODE && n <= (m+1)) ||
(msg_code == DELETION_CODE &&
left_column_no == right_column_no))
{
cout.width(left_column_no + 9);
cout << "^\n";
}
else if (msg_code == INSERTION_CODE ||
msg_code == EOF_CODE)
{
cout.width(n + 9);
cout << "^\n";
}
else
{
cout.width(left_column_no + 8);
cout << "<";
cout.width(right_column_no + errors[k].right_string_length - left_column_no);
cout.fill('-');
cout << (msg_code == SCOPE_CODE || msg_code == MANUAL_CODE ? "^\n" : ">\n");
cout.fill(' ');
}
}
PrintPrimaryMessage(k);
}
}
for (; stack_top > 0; stack_top--)
{
int i = error_stack[stack_top],
j = error_stack[stack_top - 1];
if ((errors[i].msg_code != SECONDARY_CODE &&
errors[i].msg_code != DELETION_CODE) ||
(errors[j].msg_code != DELETION_CODE &&
errors[j].msg_code != SECONDARY_CODE))
PrintLargeMessage(i);
}
if (stack_top == 0)
PrintLargeMessage(error_stack[stack_top]);
cout.flush();
delete [] error_stack;
if (control.option.errors)
lex_stream -> DestroyInput();
return;
}
//
// This procedure is invoked to print a large message that may
// span more than one line. The parameter ptr points to the
// starting line. The parameter k points to the error message in
// the error structure.
//
void ParseError::PrintLargeMessage(int k)
{
if (control.option.errors)
{
int left_line_no = lex_stream -> Line(errors[k].left_token),
left_column_no = lex_stream -> Column(errors[k].left_token),
right_line_no = lex_stream -> Line(errors[k].right_token),
right_column_no = lex_stream -> Column(errors[k].right_token);
if (left_line_no == right_line_no)
{
cout << "\n\n";
cout.width(6);
cout << left_line_no << ". ";
for (int i = lex_stream -> LineStart(left_line_no); i <= lex_stream -> LineEnd(left_line_no); i++)
Unicode::Cout(lex_stream -> InputBuffer()[i]);
cout.width(left_column_no + 8);
cout << (errors[k].msg_code == SCOPE_CODE || errors[k].msg_code == MANUAL_CODE ? "^" : "<");
cout.width(right_column_no + errors[k].right_string_length - left_column_no);
cout.fill('-');
cout << (errors[k].msg_code == SCOPE_CODE || errors[k].msg_code == MANUAL_CODE ? "^\n" : ">\n");
cout.fill(' ');
}
else
{
cout << "\n\n";
cout.width(left_column_no + 8);
cout << "<";
cout.width(lex_stream -> LineSegmentLength(errors[k].left_token));
cout.fill('-');
cout << "\n";
cout.fill(' ');
cout.width(6);
cout << left_line_no << ". ";
int i;
for (i = lex_stream -> LineStart(left_line_no); i <= lex_stream -> LineEnd(left_line_no); i++)
Unicode::Cout(lex_stream -> InputBuffer()[i]);
if (right_line_no > left_line_no + 1)
{
cout.width(left_column_no + 7);
cout << " ";
cout << ". . .\n";
}
cout.width(6);
cout << right_line_no << ". ";
for (i = lex_stream -> LineStart(right_line_no); i <= lex_stream -> LineEnd(right_line_no); i++)
Unicode::Cout(lex_stream -> InputBuffer()[i]);
cout.width(8);
cout << "";
cout.width(right_column_no + errors[k].right_string_length);
cout.fill('-');
cout << (errors[k].msg_code == SCOPE_CODE || errors[k].msg_code == MANUAL_CODE ? "^\n" : ">\n");
cout.fill(' ');
}
}
PrintSecondaryMessage(k);
return;
}
//
// This procedure is invoked to form a primary error message. The
// parameter k identifies the error to be processed. The global
// variable: msg, is used to store the message.
//
void ParseError::PrintPrimaryMessage(int k)
{
const char *name;
int i,
len = 0;
#if defined(FULL_DIAGNOSIS)
if (errors[k].name_index >= 0)
{
len = name_length[errors[k].name_index];
name = &string_buffer[name_start[errors[k].name_index]];
}
#endif
if (! control.option.errors)
{
int left_line_no = lex_stream -> Line(errors[k].left_token),
left_column_no = lex_stream -> Column(errors[k].left_token),
right_line_no = lex_stream -> Line(errors[k].right_token),
right_column_no = lex_stream -> Column(errors[k].right_token);
if (right_column_no != 0) // could not compute a column number
right_column_no += (errors[k].right_string_length - 1); // point to last character in right token
Unicode::Cout(lex_stream -> FileName());
cout << ':' << left_line_no << ':' << left_column_no
<< ':' << right_line_no << ':' << right_column_no
<< ":\n Syntax: ";
}
else cout << "*** Syntax Error: ";
switch(errors[k].msg_code)
{
case ERROR_CODE:
cout << "Parsing terminated at this token";
break;
case BEFORE_CODE:
for (i = 0; i < len; i++)
cout << name[i];
cout << " inserted before this token";
break;
case INSERTION_CODE:
for (i = 0; i < len; i++)
cout << name[i];
cout << " expected after this token";
break;
case DELETION_CODE:
if (errors[k].left_token == errors[k].right_token)
cout << "Unexpected symbol ignored";
else cout << "Unexpected symbols ignored";
break;
case INVALID_CODE:
if (len == 0)
cout << "Unexpected input discarded";
else
{
cout << "Invalid ";
for (i = 0; i < len; i++)
cout << name[i];
}
break;
case SUBSTITUTION_CODE:
for (i = 0; i < len; i++)
cout << name[i];
cout << " expected instead of this token";
break;
#if defined(SCOPE_REPAIR)
case SCOPE_CODE:
cout << '\"';
for (i = scope_suffix[- (int) errors[k].name_index];
scope_rhs[i] != 0; i++)
{
len = name_length[scope_rhs[i]];
name = &string_buffer[name_start[scope_rhs[i]]];
for (int j = 0; j < len; j++)
cout << name[j];
if (scope_rhs[i+1]) // any more symbols to print?
cout << ' ';
}
cout << '\"';
cout << " inserted to complete ";
//
// TODO: This should not be an option
//
if (errors[k].scope_name_index)
{
len = name_length[errors[k].scope_name_index];
name = &string_buffer[name_start[errors[k].scope_name_index]];
for (int j = 0; j < len; j++) // any more symbols to print?
cout << name[j];
}
else cout << "scope";
break;
#endif
case MANUAL_CODE:
cout << '\"';
for (i = 0; i < len; i++)
cout << name[i];
cout << "\" inserted to complete structure";
break;
case MERGE_CODE:
cout << "symbols merged to form ";
for (i = 0; i < len; i++)
cout << name[i];
break;
case EOF_CODE:
for (i = 0; i < len; i++)
cout << name[i];
cout << " reached after this token";
break;
default:
if (errors[k].msg_code == MISPLACED_CODE)
cout << "misplaced construct(s)";
else if (len == 0)
cout << "unexpected input discarded";
else
{
for (i = 0; i < len; i++)
cout << name[i];
cout << " expected instead";
}
break;
}
cout << '\n';
return;
}
//
// This procedure is invoked to form a secondary error message.
// The parameter k identifies the error to be processed. The
// global variable: msg, is used to store the message.
//
void ParseError::PrintSecondaryMessage(int k)
{
const char *name;
int i,
len = 0;
#if defined(FULL_DIAGNOSIS)
if (errors[k].name_index >= 0)
{
len = name_length[errors[k].name_index];
name = &string_buffer[name_start[errors[k].name_index]];
}
#endif
if (! control.option.errors)
{
int left_line_no = lex_stream -> Line(errors[k].left_token),
left_column_no = lex_stream -> Column(errors[k].left_token),
right_line_no = lex_stream -> Line(errors[k].right_token),
right_column_no = lex_stream -> Column(errors[k].right_token);
if (right_column_no != 0) // could not compute a column number
right_column_no += (errors[k].right_string_length - 1); // point to last character in right token
Unicode::Cout(lex_stream -> FileName());
cout << ':' << left_line_no << ':' << left_column_no
<< ':' << right_line_no << ':' << right_column_no
<< ":\n Syntax: ";
}
else cout << "*** Syntax Error: ";
switch(errors[k].msg_code)
{
case MISPLACED_CODE:
cout << "Misplaced construct(s)";
break;
#if defined(SCOPE_REPAIR)
case SCOPE_CODE:
cout << '\"';
for (i = scope_suffix[- (int) errors[k].name_index]; scope_rhs[i] != 0; i++)
{
len = name_length[scope_rhs[i]];
name = &string_buffer[name_start[scope_rhs[i]]];
for (int j = 0; j < len; j++) // any more symbols to print?
cout << name[j];
if (scope_rhs[i+1])
cout << ' ';
}
cout << "\" inserted to complete ";
//
// TODO: This should not be an option
//
if (errors[k].scope_name_index)
{
len = name_length[errors[k].scope_name_index];
name = &string_buffer[name_start[errors[k].scope_name_index]];
for (int j = 0; j < len; j++) // any more symbols to print?
cout << name[j];
}
else cout << "phrase";
break;
#endif
case MANUAL_CODE:
cout << '\"';
for (i = 0; i < len; i++)
cout << name[i];
cout << "\" inserted to complete structure";
break;
case MERGE_CODE:
cout << "Symbols merged to form ";
for (i = 0; i < len; i++)
cout << name[i];
break;
default:
if (errors[k].msg_code == DELETION_CODE || len == 0)
cout << "Unexpected input discarded";
else
{
for (i = 0; i < len; i++)
cout << name[i];
cout << " expected instead";
}
}
cout << '\n';
return;
}